import scrapy
from selenium import webdriver
from scrapy_study2.items import ScrapyStudy2Item
class WangyiSpider(scrapy.Spider):
    name = 'wangyi'
    # allowed_domains = ['news.163.com']
    start_urls = ['http://news.163.com/']
    #存储五个板块对应详情页的url
    models_urls = []
    #解析五大板块对应详情页的url
    #实例化一个浏览器对象
    def __init__(self):
        self.bro = webdriver.Chrome('scrapy_study2/spiders/driver/chromedriver')

    def parse(self, response):
        li_list = response.xpath('//*[@id="index2016_wrap"]/div[1]/div[2]/div[2]/div[2]/div[2]/div/ul/li')
        alist = [3,4,6,7,8]
        for index in alist:
            model_url = li_list[index].xpath("./a/@href").extract_first()
            self.models_urls.append(model_url)
        #依次对每一个板块对应的页面进行请求
        for url in self.models_urls:
            yield scrapy.Request(url=url,callback=self.parse_model)
        # pass
    #每一个板块对应的新闻标题相关的内容都是动态加载
    def parse_model(self,response):
        # response.xpath()
        # /html/body/div[1]/div[3]/div[4]/div[1]/div[1]/div/ul/li/div/div[1]/div/div[1]/h3/a
        div_list = response.xpath('/html/body/div/div[3]/div[4]/div[1]/div/div/ul/li/div/div')
        for div in div_list:
            title = div.xpath('./div/div[1]/h3/a/text()').extract_first()
            new_detail_url = div.xpath('./div/div[1]/h3/a/@href').extract_first()
            item = ScrapyStudy2Item()
            item['title'] = title
            #对新闻详情页的url发起请求
            yield scrapy.Request(url=new_detail_url, callback=self.parse_detail, meta={'item': item})

    # 解析新闻内容
    def parse_detail(self,response):
        content = response.xpath('//*[@id="content"]/div[2]//text()').extract()
        content = ''.join(content)
        item = response.meta['item']
        item['content'] = content
        yield item



    def close(self,spider):
        self.bro.quit()

